packages = c('rgdal', 'spdep', 'tmap', 'sf', 'ggpubr', 'cluster', 'factoextra', 'NbClust', 'heatmaply', 'corrplot', 'psych', 'tidyverse')
for (p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p,character.only = T)
}## Warning: package 'factoextra' was built under R version 4.0.3
## Warning: package 'NbClust' was built under R version 4.0.3
## Parsed with column specification:
## cols(
## uen = col_character(),
## registration_year = col_double(),
## primary_ssic_code = col_character(),
## postal_code = col_double(),
## category = col_character(),
## primary_ssic_category_description = col_character(),
## X_coord = col_double(),
## Y_coord = col_double()
## )
corp_info_merged <- st_as_sf(corp_info_merged, coords = c('X_coord','Y_coord'), crs = 3414)
ssic2020 <- read_csv("data/aspatial/ssic2020.csv")## Parsed with column specification:
## cols(
## `SSIC 2020` = col_character(),
## category = col_character(),
## primary_ssic_code = col_character()
## )
## Parsed with column specification:
## cols(
## postal_code = col_double(),
## X_coord = col_double(),
## Y_coord = col_double()
## )
## Reading layer `MP14_SUBZONE_WEB_PL' from data source `D:\GitHub\is415-proj\data\geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 323 features and 15 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
## projected CRS: SVY21
planning_area <- mpsz %>%
group_by(PLN_AREA_N) %>%
summarise(geometry = st_union(geometry)) %>%
ungroup() %>%
filter(!(PLN_AREA_N %in% c("NORTH-EASTERN ISLANDS",
"CENTRAL WATER CATCHMENT",
"CHANGI BAY",
"MARINA SOUTH",
"SIMPANG",
"SOUTHERN ISLANDS",
"STRAITS VIEW",
"TENGAH")))## `summarise()` ungrouping output (override with `.groups` argument)
## Simple feature collection with 47 features and 1 field
## geometry type: GEOMETRY
## dimension: XY
## bbox: xmin: 2667.538 ymin: 15748.72 xmax: 50293.96 ymax: 50256.33
## projected CRS: SVY21
## # A tibble: 47 x 2
## PLN_AREA_N geometry
## * <chr> <POLYGON [m]>
## 1 ANG MO KIO ((31070.93 39048.68, 31072.47 39019.76, 31072.2 38992.06, 3106~
## 2 BEDOK ((39348.48 32017.65, 39346.79 32013.97, 39316.22 31997.18, 393~
## 3 BISHAN ((30988.88 36251.95, 31016.58 36176.08, 31017.98 36172.26, 309~
## 4 BOON LAY ((12643.21 32171.07, 12643.06 32171.43, 12614.5 32239.82, 1261~
## 5 BUKIT BATOK ((20198.51 36532.01, 20205.86 36519.86, 20265.9 36431.76, 2030~
## 6 BUKIT MERAH ((28983.37 28341.95, 29112.53 28345.02, 29182.45 28346.68, 291~
## 7 BUKIT PANJANG ((23235.16 37023.92, 23363.43 36821, 23324.07 36810.26, 23223.~
## 8 BUKIT TIMAH ((23527.63 32853.63, 23397.96 32719.03, 23326.3 32643.58, 2331~
## 9 CHANGI ((45818.88 41497.37, 45838.76 41496.62, 45875.88 41498.13, 458~
## 10 CHOA CHU KANG ((18700.53 39143.88, 18559.17 39008.54, 18298.41 39242.47, 182~
## # ... with 37 more rows
## Coordinate Reference System:
## User input: SVY21
## wkt:
## PROJCRS["SVY21",
## BASEGEOGCRS["SVY21[WGS84]",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6326]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
## Coordinate Reference System:
## User input: EPSG:3414
## wkt:
## PROJCRS["SVY21 / Singapore TM",
## BASEGEOGCRS["SVY21",
## DATUM["SVY21",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4757]],
## CONVERSION["Singapore Transverse Mercator",
## METHOD["Transverse Mercator",
## ID["EPSG",9807]],
## PARAMETER["Latitude of natural origin",1.36666666666667,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",103.833333333333,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["Scale factor at natural origin",1,
## SCALEUNIT["unity",1],
## ID["EPSG",8805]],
## PARAMETER["False easting",28001.642,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",38744.572,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["northing (N)",north,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["easting (E)",east,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["Singapore"],
## BBOX[1.13,103.59,1.47,104.07]],
## ID["EPSG",3414]]
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO"): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition,
## but +towgs84= values preserved
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO"): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition,
## but +towgs84= values preserved
## integer(0)
## [1] "G" "F" "H" "C" "N" "I" "S" "M" "Q" "L" "J" "R" "P" "E" "K" "D" "A" "O"
for (category_id in unique(corp_info_merged$category)) {
corp_with_category <- corp_info_merged %>%
filter(category == category_id)
planning_area_3414[, paste0("Category ", category_id)]<- lengths(st_intersects(planning_area_3414, corp_with_category))
}planning_area_3414 <- planning_area_3414 %>%
mutate(Total = rowSums(across("Category G":"Category O")))## [1] 2396.213
planning_area_3414 <- planning_area_3414 %>%
mutate(`Cat G Prop` = case_when(Total != 0 ~ `Category G`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat F Prop` = case_when(Total != 0 ~ `Category F`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat H Prop` = case_when(Total != 0 ~ `Category H`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat C Prop` = case_when(Total != 0 ~ `Category C`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat N Prop` = case_when(Total != 0 ~ `Category N`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat I Prop` = case_when(Total != 0 ~ `Category I`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat S Prop` = case_when(Total != 0 ~ `Category S`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat M Prop` = case_when(Total != 0 ~ `Category M`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat Q Prop` = case_when(Total != 0 ~ `Category Q`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat L Prop` = case_when(Total != 0 ~ `Category L`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat J Prop` = case_when(Total != 0 ~ `Category J`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat R Prop` = case_when(Total != 0 ~ `Category R`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat P Prop` = case_when(Total != 0 ~ `Category P`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat E Prop` = case_when(Total != 0 ~ `Category E`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat K Prop` = case_when(Total != 0 ~ `Category K`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat D Prop` = case_when(Total != 0 ~ `Category D`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat A Prop` = case_when(Total != 0 ~ `Category A`/Total * 1000,
Total == 0 ~ 0)) %>%
mutate(`Cat O Prop` = case_when(Total != 0 ~ `Category O`/Total * 1000,
Total == 0 ~ 0))ggplot(data=planning_area_3414, aes(x=`Category M`)) +
geom_histogram(bins=20, color="black", fill="light blue")ggplot(data=planning_area_3414, aes(x=`Category M`)) +
geom_boxplot(color="black", fill="light blue")ggplot(data=planning_area_3414, aes(x=`Cat M Prop`)) +
geom_histogram(bins=20, color="black", fill="light blue")ggplot(data=planning_area_3414, aes(x=`Cat M Prop`)) +
geom_boxplot(color="black", fill="light blue")cat_g <- ggplot(data=planning_area_3414,
aes(x= `Cat G Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_f <- ggplot(data=planning_area_3414,
aes(x= `Cat F Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_h <- ggplot(data=planning_area_3414,
aes(x= `Cat H Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_c <- ggplot(data=planning_area_3414,
aes(x= `Cat C Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_n <- ggplot(data=planning_area_3414,
aes(x= `Cat N Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_i <- ggplot(data=planning_area_3414,
aes(x= `Cat I Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")cat_s <- ggplot(data=planning_area_3414,
aes(x= `Cat S Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_m <- ggplot(data=planning_area_3414,
aes(x= `Cat M Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_q <- ggplot(data=planning_area_3414,
aes(x= `Cat Q Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_l <- ggplot(data=planning_area_3414,
aes(x= `Cat L Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_j <- ggplot(data=planning_area_3414,
aes(x= `Cat J Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_r <- ggplot(data=planning_area_3414,
aes(x= `Cat R Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")cat_p <- ggplot(data=planning_area_3414,
aes(x= `Cat P Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_e <- ggplot(data=planning_area_3414,
aes(x= `Cat E Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_k <- ggplot(data=planning_area_3414,
aes(x= `Cat K Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_d <- ggplot(data=planning_area_3414,
aes(x= `Cat D Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_a <- ggplot(data=planning_area_3414,
aes(x= `Cat A Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
cat_o <- ggplot(data=planning_area_3414,
aes(x= `Cat O Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")cluster_vars <- planning_area_3414_derived %>%
select("PLN_AREA_N", ends_with("Prop"))
head(cluster_vars,10)## # A tibble: 10 x 19
## PLN_AREA_N `Cat G Prop` `Cat F Prop` `Cat H Prop` `Cat C Prop` `Cat N Prop`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ANG MO KIO 285. 70.9 69.8 35.1 49.4
## 2 BEDOK 266. 56.3 55.5 42.2 49.3
## 3 BISHAN 271. 47.8 55.4 30.2 53.9
## 4 BOON LAY 271. 153. 104. 164. 41.1
## 5 BUKIT BAT~ 281. 70.3 81.7 43.0 59.9
## 6 BUKIT MER~ 236. 35.2 54.7 28.2 47.5
## 7 BUKIT PAN~ 271. 41.8 116. 37.0 63.7
## 8 BUKIT TIM~ 208. 22.1 32.2 22.5 47.9
## 9 CHANGI 256. 56.8 227. 45.5 39.8
## 10 CHOA CHU ~ 266. 57.3 133. 34.1 65.5
## # ... with 13 more variables: `Cat I Prop` <dbl>, `Cat S Prop` <dbl>, `Cat M
## # Prop` <dbl>, `Cat Q Prop` <dbl>, `Cat L Prop` <dbl>, `Cat J Prop` <dbl>,
## # `Cat R Prop` <dbl>, `Cat P Prop` <dbl>, `Cat E Prop` <dbl>, `Cat K
## # Prop` <dbl>, `Cat D Prop` <dbl>, `Cat A Prop` <dbl>, `Cat O Prop` <dbl>
## Warning: Setting row names on a tibble is deprecated.
## # A tibble: 10 x 19
## PLN_AREA_N `Cat G Prop` `Cat F Prop` `Cat H Prop` `Cat C Prop` `Cat N Prop`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ANG MO KIO 285. 70.9 69.8 35.1 49.4
## 2 BEDOK 266. 56.3 55.5 42.2 49.3
## 3 BISHAN 271. 47.8 55.4 30.2 53.9
## 4 BOON LAY 271. 153. 104. 164. 41.1
## 5 BUKIT BAT~ 281. 70.3 81.7 43.0 59.9
## 6 BUKIT MER~ 236. 35.2 54.7 28.2 47.5
## 7 BUKIT PAN~ 271. 41.8 116. 37.0 63.7
## 8 BUKIT TIM~ 208. 22.1 32.2 22.5 47.9
## 9 CHANGI 256. 56.8 227. 45.5 39.8
## 10 CHOA CHU ~ 266. 57.3 133. 34.1 65.5
## # ... with 13 more variables: `Cat I Prop` <dbl>, `Cat S Prop` <dbl>, `Cat M
## # Prop` <dbl>, `Cat Q Prop` <dbl>, `Cat L Prop` <dbl>, `Cat J Prop` <dbl>,
## # `Cat R Prop` <dbl>, `Cat P Prop` <dbl>, `Cat E Prop` <dbl>, `Cat K
## # Prop` <dbl>, `Cat D Prop` <dbl>, `Cat A Prop` <dbl>, `Cat O Prop` <dbl>
## Warning: Setting row names on a tibble is deprecated.
## # A tibble: 10 x 18
## `Cat G Prop` `Cat F Prop` `Cat H Prop` `Cat C Prop` `Cat N Prop` `Cat I Prop`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 285. 70.9 69.8 35.1 49.4 76.6
## 2 266. 56.3 55.5 42.2 49.3 55.0
## 3 271. 47.8 55.4 30.2 53.9 48.6
## 4 271. 153. 104. 164. 41.1 35.6
## 5 281. 70.3 81.7 43.0 59.9 48.8
## 6 236. 35.2 54.7 28.2 47.5 81.8
## 7 271. 41.8 116. 37.0 63.7 50.7
## 8 208. 22.1 32.2 22.5 47.9 62.8
## 9 256. 56.8 227. 45.5 39.8 159.
## 10 266. 57.3 133. 34.1 65.5 71.2
## # ... with 12 more variables: `Cat S Prop` <dbl>, `Cat M Prop` <dbl>, `Cat Q
## # Prop` <dbl>, `Cat L Prop` <dbl>, `Cat J Prop` <dbl>, `Cat R Prop` <dbl>,
## # `Cat P Prop` <dbl>, `Cat E Prop` <dbl>, `Cat K Prop` <dbl>, `Cat D
## # Prop` <dbl>, `Cat A Prop` <dbl>, `Cat O Prop` <dbl>
## Cat G Prop Cat F Prop Cat H Prop Cat C Prop
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5520 1st Qu.:0.1203 1st Qu.:0.1350 1st Qu.:0.1261
## Median :0.6537 Median :0.2728 Median :0.2493 Median :0.1690
## Mean :0.5962 Mean :0.2813 Mean :0.2853 Mean :0.2520
## 3rd Qu.:0.6891 3rd Qu.:0.3721 3rd Qu.:0.4236 3rd Qu.:0.2431
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Cat N Prop Cat I Prop Cat S Prop Cat M Prop
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2725 1st Qu.:0.04737 1st Qu.:0.2520 1st Qu.:0.4437
## Median :0.3459 Median :0.06124 Median :0.4363 Median :0.5501
## Mean :0.3451 Mean :0.08322 Mean :0.4040 Mean :0.5322
## 3rd Qu.:0.3883 3rd Qu.:0.07372 3rd Qu.:0.5241 3rd Qu.:0.6377
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## Cat Q Prop Cat L Prop Cat J Prop Cat R Prop
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1290 1st Qu.:0.1395 1st Qu.:0.2347 1st Qu.:0.2210
## Median :0.1930 Median :0.3167 Median :0.3065 Median :0.3097
## Mean :0.2351 Mean :0.2990 Mean :0.3341 Mean :0.3279
## 3rd Qu.:0.2755 3rd Qu.:0.3795 3rd Qu.:0.4391 3rd Qu.:0.4036
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Cat P Prop Cat E Prop Cat K Prop Cat D Prop
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.3090 1st Qu.:0.01327 1st Qu.:0.08158 1st Qu.:0.00000
## Median :0.5315 Median :0.02253 Median :0.16233 Median :0.08987
## Mean :0.4641 Mean :0.07622 Mean :0.20481 Mean :0.14194
## 3rd Qu.:0.6175 3rd Qu.:0.05097 3rd Qu.:0.24424 3rd Qu.:0.19562
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## Cat A Prop Cat O Prop
## Min. :0.000000 Min. :0.00000
## 1st Qu.:0.005422 1st Qu.:0.00000
## Median :0.011862 Median :0.00000
## Mean :0.041839 Mean :0.08626
## 3rd Qu.:0.019626 3rd Qu.:0.00000
## Max. :1.000000 Max. :1.00000
## Cat G Prop Cat F Prop Cat H Prop Cat C Prop
## Min. :-3.2891 Min. :-1.26781 Min. :-1.4063 Min. :-1.08528
## 1st Qu.:-0.2443 1st Qu.:-0.72563 1st Qu.:-0.7407 1st Qu.:-0.54217
## Median : 0.3171 Median :-0.03849 Median :-0.1776 Median :-0.35773
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.5123 3rd Qu.: 0.40894 3rd Qu.: 0.6816 3rd Qu.:-0.03851
## Max. : 2.2273 Max. : 3.23879 Max. : 3.5230 Max. : 3.22094
## Cat N Prop Cat I Prop Cat S Prop Cat M Prop
## Min. :-2.401438 Min. :-0.59324 Min. :-1.9315 Min. :-2.51272
## 1st Qu.:-0.504996 1st Qu.:-0.25554 1st Qu.:-0.7267 1st Qu.:-0.41756
## Median : 0.005169 Median :-0.15669 Median : 0.1544 Median : 0.08464
## Mean : 0.000000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.300250 3rd Qu.:-0.06767 3rd Qu.: 0.5740 3rd Qu.: 0.49830
## Max. : 4.556796 Max. : 6.53578 Max. : 2.8490 Max. : 2.20878
## Cat Q Prop Cat L Prop Cat J Prop Cat R Prop
## Min. :-1.1376 Min. :-1.35192 Min. :-1.6015 Min. :-1.64930
## 1st Qu.:-0.5132 1st Qu.:-0.72102 1st Qu.:-0.4765 1st Qu.:-0.53792
## Median :-0.2037 Median : 0.08004 Median :-0.1321 Median :-0.09156
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.1954 3rd Qu.: 0.36412 3rd Qu.: 0.5034 3rd Qu.: 0.38078
## Max. : 3.7003 Max. : 3.17018 Max. : 3.1919 Max. : 3.38011
## Cat P Prop Cat E Prop Cat K Prop Cat D Prop
## Min. :-1.8734 Min. :-0.4334 Min. :-1.0844 Min. :-0.7178
## 1st Qu.:-0.6262 1st Qu.:-0.3579 1st Qu.:-0.6524 1st Qu.:-0.7178
## Median : 0.2719 Median :-0.3052 Median :-0.2249 Median :-0.2633
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.6192 3rd Qu.:-0.1436 3rd Qu.: 0.2088 3rd Qu.: 0.2714
## Max. : 2.1631 Max. : 5.2525 Max. : 4.2101 Max. : 4.3392
## Cat A Prop Cat O Prop
## Min. :-0.2732 Min. :-0.3554
## 1st Qu.:-0.2378 1st Qu.:-0.3554
## Median :-0.1957 Median :-0.3554
## Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.1450 3rd Qu.:-0.3554
## Max. : 6.2558 Max. : 3.7644
r <- ggplot(data=planning_area_3414_derived,
aes(x= `Cat M Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue")
sg_business_minmax_df <- as.data.frame(sg_business.minmax)
s <- ggplot(data=sg_business_minmax_df,
aes(x=`Cat M Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue") +
ggtitle("Min-Max Standardisation")
sg_business_zscore_df <- as.data.frame(sg_business.zscore)
z <- ggplot(data=sg_business_zscore_df,
aes(x=`Cat M Prop`)) +
geom_histogram(bins=20,
color="black",
fill="light blue") +
ggtitle("Z-score Standardisation")
ggarrange(r, s, z,
ncol = 3,
nrow = 1)m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
ac <- function(x) {
agnes(sg_business, method = x)$ac
}
map_dbl(m, ac)## average single complete ward
## 0.8845678 0.8922229 0.8896038 0.9075350
With reference to the output above, we can see that Ward’s method provides the strongest clustering structure among the four methods assessed. Hence, in the subsequent analysis, only Ward’s method will be used.
set.seed(54321)
gap_stat <- clusGap(sg_business, FUN = hcut, nstart = 25, K.max = 10, B = 50)
# Print the result
print(gap_stat, method = "firstmax")## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = sg_business, FUNcluster = hcut, K.max = 10, B = 50, nstart = 25)
## B=50 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
## --> Number of clusters (method 'firstmax'): 1
## logW E.logW gap SE.sim
## [1,] 7.904633 8.665080 0.7604468 0.04103340
## [2,] 7.725283 8.360427 0.6351444 0.03845202
## [3,] 7.507726 8.229915 0.7221892 0.03267079
## [4,] 7.415381 8.129552 0.7141711 0.02971707
## [5,] 7.266590 8.046361 0.7797706 0.03046773
## [6,] 7.192193 7.974757 0.7825644 0.03056135
## [7,] 7.108255 7.914675 0.8064201 0.03002151
## [8,] 7.015753 7.858996 0.8432428 0.03013872
## [9,] 6.941829 7.805217 0.8633878 0.02932208
## [10,] 6.871708 7.753003 0.8812947 0.02814277
heatmaply(normalize(sg_business_mat),
Colv=NA,
dist_method = "euclidean",
hclust_method = "ward.D",
seriate = "OLO",
colors = Blues,
k_row = 10,
margins = c(NA,200,60,NA),
fontsize_row = 4,
fontsize_col = 5,
main="Geographic Segmentation of Singapore by Business Prominence",
xlab = "Business Prominence",
ylab = "Singapore's Planning Areas"
)## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO"): Discarded datum Unknown based on WGS84 ellipsoid in CRS definition,
## but +towgs84= values preserved
## Neighbour list object:
## Number of regions: 47
## Number of nonzero links: 212
## Percentage nonzero weights: 9.597103
## Average number of links: 4.510638
## Link number distribution:
##
## 1 2 3 4 5 6 7 8
## 1 3 8 13 10 8 1 3
## 1 least connected region:
## 44 with 1 link
## 3 most connected regions:
## 17 36 41 with 8 links
plot(planning_area_3414_sp, border=grey(.5))
plot(sg.nb, coordinates(planning_area_3414_sp), col="blue", add=TRUE)## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 47
## Number of nonzero links: 212
## Percentage nonzero weights: 9.597103
## Average number of links: 4.510638
## Link number distribution:
##
## 1 2 3 4 5 6 7 8
## 1 3 8 13 10 8 1 3
## 1 least connected region:
## 44 with 1 link
## 3 most connected regions:
## 17 36 41 with 8 links
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 47 2209 36488.8 24823350 168882011
## [1] "mst" "matrix"
## [1] 46 3
## [,1] [,2] [,3]
## [1,] 14 37 44.92823
## [2,] 14 2 49.55329
## [3,] 37 1 52.85235
## [4,] 1 47 61.51380
## [5,] 14 36 63.18488
## [6,] 36 30 36.39988
plot(planning_area_sp, border=gray(.5))
plot.mst(sg.mst, coordinates(planning_area_sp),
col="blue", cex.lab=0.7, cex.circles=0.005, add=TRUE)## List of 8
## $ groups : num [1:47] 1 1 1 5 1 6 1 1 1 1 ...
## $ edges.groups:List of 10
## ..$ :List of 3
## .. ..$ node: num [1:28] 47 40 46 1 35 37 17 14 3 42 ...
## .. ..$ edge: num [1:27, 1:3] 40 46 47 1 35 37 37 17 14 3 ...
## .. ..$ ssw : num 2227
## ..$ :List of 3
## .. ..$ node: num 20
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num [1:6] 41 32 22 23 38 25
## .. ..$ edge: num [1:5, 1:3] 22 32 23 41 41 38 22 25 32 23 ...
## .. ..$ ssw : num 574
## ..$ :List of 3
## .. ..$ node: num 44
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num [1:3] 4 29 43
## .. ..$ edge: num [1:2, 1:3] 4 29 29 43 102 ...
## .. ..$ ssw : num 156
## ..$ :List of 3
## .. ..$ node: num [1:4] 6 31 26 12
## .. ..$ edge: num [1:3, 1:3] 26 6 31 12 26 ...
## .. ..$ ssw : num 287
## ..$ :List of 3
## .. ..$ node: num 18
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num 45
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num 28
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## ..$ :List of 3
## .. ..$ node: num 34
## .. ..$ edge: num[0 , 1:3]
## .. ..$ ssw : num 0
## $ not.prune : NULL
## $ candidates : int [1:10] 1 2 3 4 5 6 7 8 9 10
## $ ssto : num 7439
## $ ssw : num [1:10] 7439 6366 5546 4970 4501 ...
## $ crit : num [1:2] 1 Inf
## $ vec.crit : num [1:47] 1 1 1 1 1 1 1 1 1 1 ...
## - attr(*, "class")= chr "skater"
## clustergrps
## 1 2 3 4 5 6 7 8 9 10
## 28 1 6 1 3 4 1 1 1 1
plot(planning_area_sp, border=gray(.5))
plot(skaterclust, coordinates(planning_area_3414_sp), cex.lab=.7,
groups.colors=c("red","green","blue", "brown", "pink"), cex.circles=0.005, add=TRUE)## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
## Warning in segments(coords[id1, 1], coords[id1, 2], coords[id2, 1],
## coords[id2, : "add" is not a graphical parameter
groups_mat <- as.matrix(skaterclust$groups)
sg_biz_spatialcluster <- cbind(sg_biz_cluster, as.factor(groups_mat)) %>%
rename(`SP_CLUSTER`=`as.factor.groups_mat.`)
qtm(sg_biz_spatialcluster, "SP_CLUSTER")